******************************************************************************************
/*	December 28, 2020
	Replication run using Stata 16
	
	Gonzales, León-Ciliotta and Martínez, "How Effective Are Monetary Incentives to Vote? Evidence from a Nationwide Policy"
	This file replicates Table 7 and Figure 6, Appendix Figures G1-G3
	
	Note: The regression for turnout in Chile used in Figure 6 uses a random sample of 15 million observations out of the universe of voters (28.7 million observations) in the first and second round of the 2017 presidential elections. Some computers may require adjustments to the maximum number of variables (set maxvar) or the maximum matrix size (set matsize) to use this sample. Even then, some computers may simply be unable to use a dataset of this size. For convenience, we have written the code to use a random sample of voters of a size to be determined by the user using the local "size" in line 27. Larger samples are computationally more demanding but yield results that are more precise and closer to the ones in this paper, which use a random sample of 15 million observations.  Samples of 10 million observations or more should provide results very similar to the ones reported in the paper.                                       		
*/
******************************************************************************************

clear
clear matrix
clear mata
*set maxvar  32767 
*set matsize 11000

*global dir "" // If running this do-file on its own, make sure to first run program "Stata_setup" and then programs "Data_senior" & "Data_Chile" to create the necessary CLEAN data files and use global dir to specify the correct path”. 
global P_INTERMEDIATE "Replication_files/Data/INTERMEDIATE"
global P_CLEAN "Replication_files/Data/CLEAN"
global P_RESULTS "Replication_files/Results"

cd "${dir}"

use "$P_CLEAN/elecciones_2016full",clear

local size=15000000 //Change this size as prefered. This local specifies the size of the random sample to use in the estimation for Chilean dataset. The results in the published paper use a random sample of 15 million observations (local size = 15000000)

******************************************************************************************
*Table 7: Senior Exemption from Compulsory Voting and Voter Turnout
******************************************************************************************
global out outreg2 using "$P_RESULTS\Tables\Table7.tex", bracket nocons keep(sh_* yrs) nor2 dec(3)
reghdfe turnout fraction_16-fraction_68 sh_70_75 fraction_76-fraction_122  [aweight=electores], absorb(ubigeo round) cluster(ubigeo)
mat M = e(dof_table)
local M1 = M[1,1]
sum `e(depvar)' if e(sample) [aweight=electores]
local temp: di %4.3fc r(mean)
tab local_id if e(sample),nofreq
${out} replace addstat(Districts, `M1', Polling stations, `r(r)', Mean of dep. var, `temp', R-squared, e(r2_within)) addtext(District FE, Yes, Polling station FE, No, Election type FE, Yes, Share by age, Yes) tex(frag)
reghdfe turnout fraction_16-fraction_68 sh_70_75 fraction_76-fraction_122 if electores>=280&electores<=300 [aweight=electores], absorb(ubigeo round) cluster(ubigeo)
mat M = e(dof_table)
local M1 = M[1,1]
sum `e(depvar)' if e(sample) [aweight=electores]
local temp: di %4.3fc r(mean)
tab local_id if e(sample),nofreq
${out} addstat(Districts, `M1',Polling stations, `r(r)', Mean of dep. var, `temp', R-squared, e(r2_within)) addtext(District FE, Yes, Polling station FE, No, Election type FE, Yes, Share by age, Yes) tex(frag)
reghdfe turnout fraction_16-fraction_68 sh_70_75 fraction_76-fraction_122 [aweight=electores], absorb(local_id round) cluster(ubigeo)
mat M = e(dof_table)
local M1 = M[1,1]
sum `e(depvar)' if e(sample) [aweight=electores]
local temp: di %4.3fc r(mean)
tab local_id if e(sample),nofreq
${out} addstat(Districts, `M1',Polling stations, `r(r)', Mean of dep. var, `temp', R-squared, e(r2_within)) addtext(District FE, No, Polling station FE, Yes, Election type FE, Yes, Share by age, Yes) tex(frag)
reghdfe turnout fraction_16-fraction_68 sh_70_72 fraction_73-fraction_122 [aweight=electores], absorb(ubigeo round) cluster(ubigeo)
mat M = e(dof_table)
local M1 = M[1,1]
sum `e(depvar)' if e(sample) [aweight=electores]
local temp: di %4.3fc r(mean)
tab local_id if e(sample),nofreq
${out} addstat(Districts, `M1',Polling stations, `r(r)', Mean of dep. var, `temp', R-squared, e(r2_within)) addtext(District FE, Yes, Polling station FE, No, Election type FE, Yes, Share by age, Yes) tex(frag)
reghdfe turnout fraction_16-fraction_68 yrs_wo_elec fraction_76-fraction_122  [aweight=electores], absorb(ubigeo round) cluster(ubigeo)
mat M = e(dof_table)
local M1 = M[1,1]
sum `e(depvar)' if e(sample) [aweight=electores]
local temp: di %4.3fc r(mean)
tab local_id if e(sample),nofreq
${out} addstat(Districts, `M1',Polling stations, `r(r)', Mean of dep. var, `temp', R-squared, e(r2_within)) addtext(District FE, Yes, Polling station FE, No, Election type FE, Yes, Share by age, Yes) tex(frag)
reghdfe turnout fraction_16-fraction_68 sh_70_75 sh_70_75_runoff fraction_76-fraction_122  [aweight=electores], absorb(ubigeo round) cluster(ubigeo)
test sh_70_75+ sh_70_75_runoff==0 
local temp1: di %4.3fc r(p) 
mat M = e(dof_table)
local M1 = M[1,1]
sum `e(depvar)' if e(sample) [aweight=electores]
local temp: di %4.3fc r(mean)
tab local_id if e(sample),nofreq
${out} addstat(Districts, `M1',Polling stations, `r(r)', Mean of dep. var, `temp', R-squared, e(r2_within)) addtext(District FE, Yes, Polling station FE, No, Election type FE, Yes, Share by age, Yes, p-value a+b=0,"`temp1'") tex(frag)

******************************************************************************************
*Figure 6: Senior Exemption from Compulsory Voting and Voter Turnout
******************************************************************************************
preserve
reghdfe turnout fraction_16-fraction_68 fraction_70-fraction_122  [aweight=electores], absorb(ubigeo round) cluster(ubigeo)
parmest,saving("$P_INTERMEDIATE\event_70",replace)
restore

*Panel A Peru
preserve
use "$P_INTERMEDIATE\event_70.dta",clear
keep in 3/64
set obs 63
mvencode estimate min95 max95,mv(0)
gen age=substr(parm,-2,2)
destring age,replace
replace age=69 if missing(age)
twoway (rcap min95 max95 age, lcolor(gs7)) (scatter estimate age,msymbol(diamond) msize(small) mcolor(olive)), ///
ytitle(Voter turnout (relative to age 69)) ylabel(, angle(horizontal) format(%2.1f)) xtitle(Age) xline(69.5) ///
legend(order(2 "Point estimate" 1 "95% confidence interval") rows(1)) ///
graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ///
plotregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) name(event_peru_full,replace)
graph export "$P_RESULTS\Figures\Figure6_a.pdf", as(pdf) replace
restore

preserve
*Panel B: Chile + Peru

use "$P_CLEAN/chile.dta",clear
generate random = runiform()
sort random
reghdfe voto ib69.age in 1/`size', absorb(district runoff) 
parmest,saving("$P_INTERMEDIATE\turnout_chile_2017",replace)
restore

preserve
use "$P_INTERMEDIATE\event_70.dta",clear
append using "$P_INTERMEDIATE\turnout_chile_2017.dta"
save "$P_INTERMEDIATE\event_70_full.dta",replace
restore

preserve
use "$P_INTERMEDIATE\event_70_full",clear
drop in 84/107
gen peru=(_n<=83) 
drop if _n>=166
set obs 166
mvencode estimate min95 max95,mv(0) over
gen age=substr(parm,-2,2) if peru==1
replace age=substr(parm,1,2) if peru==0
destring age,replace
replace age=69 if missing(age)
replace peru=1 if _n==166
gen age_draw=age
replace age_draw=age-0.1 if peru==1
replace age_draw=age+0.1 if peru==0
twoway (scatter estimate age_draw if peru==0,msymbol(square) mcolor(olive_teal) msize(small)) (rcap min95 max95 age_draw, lcolor(gs7)) (scatter estimate age_draw if peru==1,msymbol(diamond) msize(small) mcolor(olive))   if age>=60&age<=80, ytitle(Voter turnout (relative to age 69)) ylabel(, angle(horizontal) format(%2.1f)) xtitle(Age) xline(69.5) legend(order(3 "Peru (Compulsory voting until age 70)" 1 "Chile (No compulsory voting)") rows(1)) text(-0.47 59.8 "Aggr. turnout Peru = 81%",placement(e) size(small)) text(-0.52 59.75 "Aggr. turnout Chile = 48% (65% at age 69)",placement(e) size(small)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) plotregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) name(event_peru_chile_60_80,replace)
graph export "$P_RESULTS\Figures\Figure6_b.pdf", as(pdf) replace
restore

******************************************************************************************
*Figure G1: Share of Booths without Voters from each Age
******************************************************************************************
preserve
keep if round==1
forvalues x=18/90{
    gen cero_`x'=fraction_`x'==0 if fraction_`x'<.
}
gen uno=1
fcollapse (sum) cero_* uno
forvalues x=18/90{
    replace cero_`x'=cero_`x'/uno
}
reshape long cero_, i(uno) j(edad)
twoway (scatter cero_ edad if edad<=80), ytitle(Share of booths without voters) ylabel(, angle(horizontal)) xtitle(Age) xline(70, lcolor(red)) xlabel(20(10)80) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) plotregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) name(share_zero,replace)
graph export "$P_RESULTS\Figures\FigureG1.pdf", as(pdf) replace
restore

******************************************************************************************
*Figure G2: Distribution of Registered voters by age in Peru and Chile
******************************************************************************************
preserve
keep if round==1
collapse (mean) fraction*   [aweight= electores ]
gen i=1
reshape long fraction_, i(i) j(age)
replace fraction_=fraction_*100
rename fraction_ share
twoway bar share age if inrange(age,60,80), barwidth(1) bstyle(histogram) ///
ytitle(Percentage of registered voters) xlabel(60(1)80) ylabel(0(0.5)1.5, angle(horizontal)) xtitle(Age)  ///
graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ///
plotregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ///
xline(70) scheme(sj) name(histogram_70, replace) 
graph export "$P_RESULTS\Figures\FigureG2_a.pdf", as(pdf) replace
restore

preserve
use "$P_CLEAN/chile.dta",clear
keep if runoff ==0
fcollapse (count) vot, by(age)
local total = 14347288
gen share = voto*100/`total'
keep share age
label var share "Percentage of registered voters by age"

twoway__histogram_gen age, width(1) discrete percent gen(h x) 
twoway bar share age if inrange(age,60,80), barwidth(1) bstyle(histogram) ///
ytitle(Percentage of registered voters) xlabel(60(1)80) ylabel(0(0.5)1.5, angle(horizontal)) xtitle(Age)  ///
graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ///
plotregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ///
xline(70) scheme(sj) name(histogram_70, replace) 
graph export "$P_RESULTS\Figures\FigureG2_b.pdf", as(pdf) replace
restore

******************************************************************************************
*Figure G3: Voter Turnout by Age in Peru and Chile: Full Distribution
******************************************************************************************
preserve
use "$P_INTERMEDIATE\event_70_full",clear
drop in 1/2
drop in 63/105
drop if _n>125
drop if t==.
set obs 126
mvencode estimate min95 max95,mv(0)
gen age=substr(parm,-2,2) if _n<63
replace age=substr(parm,1,2) if _n>=63

destring age,replace
replace age=69 if missing(age)
gen peru=(_n<=62|_n==126)
gen age_draw=age
replace age_draw=age-0.1 if peru==1
replace age_draw=age+0.1 if peru==0
twoway (scatter estimate age_draw if peru==0,msymbol(square) mcolor(olive_teal) msize(small)) (rcap min95 max95 age_draw, lcolor(gs7)) (scatter estimate age_draw if peru==1,msymbol(diamond) msize(small) mcolor(olive)), ///
legend(order(3 "Peru (Compulsory voting until age 70)" 1 "Chile (No compulsory voting)") rows(1)) ///
ytitle(Voter turnout (relative to age 69)) ylabel(, angle(horizontal) format(%2.1f)) xtitle(Age) xline(69.5) ///
graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ///
plotregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) name(event_peru_chile_full,replace)
graph export "$P_RESULTS\Figures\FigureG3.pdf", as(pdf) replace
restore

erase "$P_INTERMEDIATE\event_70.dta"
erase "$P_INTERMEDIATE\turnout_chile_2017.dta"
erase "$P_INTERMEDIATE\event_70_full.dta"